home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
TeX 1995 July
/
TeX CD-ROM July 1995 (Disc 1)(Walnut Creek)(1995).ISO
/
web
/
noweb
/
src
/
icon
/
l2h.nw
(
.txt
)
< prev
next >
Wrap
LaTeX Document
|
1995-02-24
|
56KB
|
1,582 lines
\documentstyle[noweb,multicol]{article}
\title{Converting {\LaTeX} to HTML}
\author{Norman Ramsey\\{\tt norman@bellcore.com}}
\noweboptions{smallcode}
\setcounter{secnumdepth}{1}
\begin{document}
\pagenumbering{roman}
\maketitle
\tableofcontents
\pagenumbering{arabic}
\section{Introduction}
This program provides an infrastructure for converting {\LaTeX} to
HTML.
That infrastructure can be used to make a {\tt noweb} filter or to
make a standalone conversion program.
The program is roughly divided into three parts.
Section~\ref{cs-decls} assigns a meaning (treatment) to each control
sequence.
It is roughly declarative, and the hope is that one day it can
be replaced by a data file, which could be augmented dynamically.
(The central flaw in this program is that all {\TeX} control
sequences must be hard-wired.)
Section~\ref{engine} describes the engine used to do the conversion,
Sections \ref{imp-decl}~and~\ref{html-format} gives the procedures that do the individual conversions.
\section{Descriptions of control sequences}
\label{cs-decls}
This section defines behavior for each control sequence we know how to
convert.
The definitions have a declarative flavor, since most are done by
procedure calls.
These calls initialize the machinery descriped in Section~\ref{cs-tables}.
{\LaTeX} control sequences come first, using
the same organization as the quick reference card from the
second edition of the {\LaTeX} manual.
Other control sequences follow.
\subsection{{\LaTeX} control sequences}
\subsubsection{Sentences and paragraphs}
<<control-sequence assignments>>=
substitution(",", " ")
substitution(" ", " ")
substitution("\n", "\n")
substitution("\t", " ")
ignore("@")
ignore("/") # no italic correction
substitution("", "\n") # \<newline> treated as request for newline
every c := !"$%#{}_" do
substitution(c, c)
substitution("&", "&")
argblock("emph", "em")
argblock("footnote", "<b>[</b>", "<b>]</b>") # put footnotes in bold brackets
argblock("footnotetext", "<b>[</b>", "<b>]</b>")
argblock("thanks", "<b>[</b>", "<b>]</b>") # put footnotes in bold brackets
substitution("today", &date)
\subsubsection{Type style}
<<control-sequence assignments>>=
ignore("textrm") # html can't switch to default font!
argblock("textit", "i")
argblock("textbf", "b")
argblock("textsl", "i")
ignore("textsc")
argblock("texttt", "tt")
ignore("textsf")
ignore("boldmath")
# \boldmath could be done by introducing S.mathfont, but I don't want to!
<<control-sequence assignments>>=
ignore("mathrm") # html can't switch to default font!
argblock("mathit", "i")
argblock("mathbf", "b")
argblock("mathtt", "tt")
ignore("mathsf")
argblock("mathcal", "i")
@ HTML has only one size.
<<control-sequence assignments>>=
every ignore("tiny" | "scriptsize" | "footnotesize" | "small" | "normalsize" |
"large" | "Large" | "LARGE" | "huge" | "Huge")
\subsubsection{Accents and symbols}
I couldn't find an official way to do symbols.
Maybe when CERN comes back up I can find the ISO Latin~1 character set.
<<control-sequence assignments>>=
every accent(key(accent_name))
every ignore("dag" | "ddag" | "S" | "P" | "copyright" | "pounds")
\subsubsection{Sectioning and table of contents}
<<control-sequence assignments>>=
argblockv("part", "h1", &null, "*[")
argblockv("chapter", "h1", &null, "*[")
argblockv("section", "h2", &null, "*[")
argblockv("subsection", "h3", &null, "*[")
argblockv("subsubsection", "h4", &null, "*[")
argblockv("paragraph", "h5", &null, "*[")
argblockv("subparagraph", "h6", &null, "*[")
ignore("appendix")
auxfile("tableofcontents", "toc", "<p><b>[Table of contents]</b><p>",
"<h2>Table of Contents</h2>")
cstab["tableofcontents"] := Ctableofcontents # override to call set_toclevel
ignore("listoftables")
\subsubsection{Mathematical formulas}
Here we see our first assignments to [[cstab]], which is the real
technology underlying these seemingly declarative calls.
I'll assign to [[cstab]] directly when some really special behavior is
called for. In this case, it's going in and out of math mode.
<<control-sequence assignments>>=
cstab["("] := Cmath
cstab[")"] := Cmath_end
cstab["["] := Cdisplaymath
cstab["]"] := Cdisplaymath_end
ignoreenv("equation")
every table_env(star("eqnarray"), 0, " ", "blockquote") # also lame
substitution("frac", "<b>frac</b>")
substitution("sqrt", "<b>frac</b>")
every substitution("ldots" | "cdots" | "vdots", "...")
ignore("left")
ignore("right")
ignore("overline")
substitution(":", " ")
substitution(";", " ")
ignore("!")
@ The [[star]] procedure lets us define \verb+eqnarray+ and
\verb+eqnarray*+ in one fell swoop.
<<*>>=
procedure star(cs)
suspend cs | (cs || "*")
There are a gazillion symbols. I'll add them on demand.
<<control-sequence assignments>>=
substitution("Diamond", "<>")
substitution("langle", "<")
substitution("rangle", ">")
substitution("le", "<=")
substitution("ge", ">=")
substitution("bmod", "</i>mod<i>") # better hook in with math
substitution("equiv", "===")
\subsubsection{Displayed paragraphs}
HTML really has only one kind of displayed paragraph---the block quotation.
<<control-sequence assignments>>=
envblock("quote", "blockquote")
envblock("quotation", "blockquote")
envblock("center", "blockquote")
envblock("flushleft", "blockquote")
envblock("flushright", "blockquote")
envblock("verse", "blockquote")
begintab["verbatim"] := Cverbatim
begincl["verbatim"] := verbatim_cl("pre", "\\end{verbatim}")
cstab["verb"] := Cverb
\subsubsection{Lists}
<<control-sequence assignments>>=
cstab["item"] := Citem
csclosure["item"] := [item_cl("<li>", "", "<li>")]
listenv("itemize", "ul")
listenv("enumerate", "ol")
listenv("description", "dl")
\subsubsection{???}
<<control-sequence assignments>>=
ignore("documentstyle", "[{")
ignore("documentclass", "[{")
ignore("usepackage", "[{")
ignore("pagestyle", "{")
ignore("pagenumbering", "{")
\subsubsection{Title page and abstract}
I could be clever and have \verb+\title+ have a side effect
that sticks in the right boilerplate when we see \verb+\begin{document}+,
but for now it's not worth the hassle.
<<control-sequence assignments>>=
argblockv("title", "h1")
argblockv("author","address")
argblockv("date", "b")
substitution("maketitle", "<!--title goes here-->")
ignoreenv("titlepage")
envblock("abstract", "<h2>Abstract</h2><blockquote>", "</blockquote>")
\subsubsection{Cross-reference}
A more ambitious scheme would make labels anchor at preceding
sectioning commands, but it's hard to see how to do that in one pass.
Instead, I just use some conventional glyphs.
I use special procedures for the cross-references so I can have an
arrow pointing either forward or backward, depending on the direction
of the reference.
<<control-sequence assignments>>=
cstab["label"] := Clabel
cstab["ref"] := Cref
cstab["pageref"] := Cref
\subsubsection{Bibliography and citation}
For the bibliography, I actually go grubbing for a {\tt .bbl} file if
I can find one.
<<control-sequence assignments>>=
ignore("bibliographystyle", "{")
auxfile("bibliography", "bbl", "<b>[BibTeX bibliography]</b>", &null, "{")
envblock("thebibliography", "<h2>References</h2>", "", "{")
cstab["cite"] := Ccite
cstab["bibitem"] := Cbibitem
ignore("newblock")
ignore("nocite", "{")
\subsubsection{Splitting the input}
All input is ignored. Those things are in their own files.
<<control-sequence assignments>>=
every ignore("input" | "include" | "includeonly", "{")
# filecontents not done yet
\subsubsection{Line breaking}
<<control-sequence assignments>>=
substitution("\\", "<br>", "[")
substitution("linebreak", "<br>")
ignore("-")
ignoreenv("sloppypar")
ignore("sloppy")
\subsubsection{Page breaking}
I simulate forced page breaks by horizontal rules.
<<control-sequence assignments>>=
substitution("pagebreak", "<hr>")
substitution("newpage", "<hr>")
substitution("clearpage", "<hr>")
ignore("enlargethispage", "*{")
\subsubsection{Boxes}
<<control-sequence assignments>>=
ignore("mbox")
ignore("makebox", "([[") # ( comes from picture area
ignore("fbox")
ignore("framebox", "[[") # could insert horizontal rules, but why?
ignore("newsavebox", 1)
ignore("sbox", 2)
ignore("savebox", "{[[{")
ignore("usebox", 1)
envblock("minipage", "blockquote", &null, "[{")
argblock("parbox", "blockquote", &null, "[{")
\subsubsection{Space}
<<control-sequence assignments>>=
ignore("hspace", "*{")
ignore("hfil")
ignore("hfill")
ignore("vspace", "*{")
ignore("vfil")
ignore("vfill")
\subsubsection{Length}
<<control-sequence assignments>>=
ignore("newlength", "{")
ignore("setlength", "{{")
ignore("addtolength", "{{")
\subsubsection{Pictures}
<<control-sequence assignments>>=
envblock("picture", "<b>[picture]</b>", "", "((")
ignore("put", "({")
ignore("multiput", "(({{")
ignore("dashbox", "{([")
ignore("line", "({")
ignore("vector", "({")
ignore("shortstack", "[")
ignore("circle", "*{")
ignore("oval", "([")
ignore("frame")
ignore("thinlines")
ignore("thicklines")
\subsubsection{Figures and Tables}
I surround figures and tables with horizontal rules.
<<control-sequence assignments>>=
every envblock(star("figure"), "<hr>", "<hr>", "[")
every envblock(star("table"), "<hr>", "<hr>", "[")
argblock("caption", "b") # captions in bold
\subsubsection{{\tt tabbing} environment}
I can't see how to do anything sensible with {\tt tabbing}.
<<control-sequence assignments>>=
envblock("tabbing", "blockquote")
# \= is accent
ignore(">")
ignore("+")
ignore("kill")
\subsubsection{{\tt array} and {\tt tabular} environment}
<<control-sequence assignments>>=
envblock("array", "blockquote", &null, "[{")
envblock("tabular", "blockquote", &null, "[{")
envblock("tabularx", "blockquote", &null, "[{")
ignore("multicolumn", "{{")
substitution("hline", "<hr>")
ignore("cline", "{")
\subsubsection{Definitions}
<<control-sequence assignments>>=
ignore("newcommand", "A[[{")
ignore("renewcommand", "A[[{")
ignore("newenvironment", "{[{{")
ignore("renewenvironment", "{[{{")
ignore("newtheorem", "{{")
\subsubsection{Numbering}
We have to have a special [[setcounter]] so we can ignore the right stuff
in the table of contents.
<<control-sequence assignments>>=
cstab["setcounter"] := Csetcounter
ignore("addtocounter", "{{")
\subsubsection{Other {\LaTeX} control sequences}
<<control-sequence assignments>>=
cstab["makeatletter"] := Cmakeatletter
cstab["makeatother"] := Cmakeatother
Here are all the old-style font changes.
<<control-sequence assignments>>=
fontchange("tt", "tt")
fontchange("bf", "b")
fontchange("it", "i")
fontchange("sl", "i")
fontchange("em", "em")
ignore("rm") # html can't switch to default font!
ignore("sf")
@ And some new ones
<<control-sequence assignments>>=
ignore("rmfamily")
<<control-sequence assignments>>=
ignoreenv("document")
<<control-sequence assignments>>=
substitution("LaTeX", "LaTeX")
<<control-sequence assignments>>=
ignore("numberline", "{")
ignore("protect")
ignore("twocolumn", "C")
ignore("typeout", "[{")
ignore("closedbib")
<<control-sequence assignments>>=
every ignore("leftmargini" | "leftmarginii" | "labelsep" | "fboxsep", "=")
every ignore("tabcolsep", "=")
\subsection{Control sequences from various {\LaTeX} packages}
<<control-sequence assignments>>=
ignoreenv("multicols", "{C")
cstab["citeN"] := Ccite
ignore("afterpage", "{")
A (perhaps vain) attempt to implement \verb+\kill+.
<<control-sequence assignments>>=
cstab["kill"] := Ckill
\subsection{Plain {\TeX} control sequences}
<<control-sequence assignments>>=
argblock("noalign", "<br>", "<br>") # not clear what else to do...
argblock("centerline", "<br>", "<br>")
substitution("cr", "<br>")
substitution("hrule", "<hr>")
substitution("vrule", "|")
substitution("hrulefill", "------")
ignore("hbox")
ignore("rlap")
ignore("llap")
ignore("vbox")
ignore("vtop")
ignore("message", "{")
ignore("relax")
ignore("null")
ignore("offinterlineskip")
<<control-sequence assignments>>=
cstab["par"] := implicit_paragraph
cstab["smallskip"] := implicit_paragraph
cstab["medskip"] := implicit_paragraph
cstab["bigskip"] := implicit_paragraph
cstab["vskip"] := implicit_paragraph
csclosure["vskip"] := "="
We can't give the grouping control sequences their real meaning, because
that would blow our brace balance when ignoring definitions and the like.
The proper solution would be to distinguish between grouping and braces,
but that would require much more sophistication than we've got just now.
<<control-sequence assignments>>=
every ignore("begingroup" | "endgroup" | "bgroup" | "egroup")
<<control-sequence assignments>>=
cstab["newif"] := Cnewif
cstab["iffalse"] := Ciffalse
cstab["iftrue"] := Ciftrue
cstab["else"] := Celse
cstab["fi"] := Cfi
cstab["ifx"] := cstab["if"] := cstab["ifnum"] := Ciffalse
Lots of assignable things:
<<control-sequence assignments>>=
ignore("let", "A=")
every ignore("hfuzz" | "parindent" | "parskip" | "baselineskip", "=")
every ignore("hbadness" | "hsize" | "vsize" | "overfullrule" | "tabskip", "=")
substitution("hskip", " ", "=")
<<control-sequence assignments>>=
ignore("unskip")
ignore("hss")
ignore("phantom", "{")
every ignore("kern" | "lower" | "spacefactor", "=") # a cheat, but works
every ignore("clubpenalty" | "widowpenalty", "=")
@ Other stuff to be ignored:
<<control-sequence assignments>>=
every ignore("expandafter" | "indent" | "noindent" | "leavevmode" | "strut")
ignore("def", 1)
<<control-sequence assignments>>=
substitution("TeX", "TeX")
\subsection{Other control sequences}
I get to include my favorite {\TeX} hacks.
We define ignoring loosely; the count denotes the number of balanced-brace pairs.
We also ignore everything before an ignored balanced-brace pair, which means
it works for \verb+\def+.
<<control-sequence assignments>>=
ignore("noweboptions", 1)
Now, here are a couple of righteous hacks!
The idea is that most views will ignore this stuff, but the indexer might
use it to get clever about dumping chunks and all in the right places.
<<control-sequence assignments>>=
substitution("nowebindex", "<nowebindex>")
substitution("nowebchunks", "<nowebchunks>")
ignore("nowebsize")
<<control-sequence assignments>>=
envblock("fields", "blockquote", &null, "[") # lame; could try to <tt> 1st col
envblock("fields*", "blockquote", &null, "{") # lame; could try to <tt> 1st col
ignore("citeauthoryear", "{{{")
ignore("authoryear", "{{")
substitution("bibrule", "--------")
let("bibskip", "par")
every cstab["anoncite"|"authorcite"] := Ccite
This will always have to be patched by hand, but it may be worth it.
<<control-sequence assignments>>=
argblock("psfig", "<a href=\"", "\">PostScript</a>")
ignore("pssilent")
ignore("psnoisy")
\section{The conversion engine}
\label{engine}
The converter doesn't have the luxury of working on the whole text at
once; instead it has to accept and convert a piece at a time.
If I really understood co-expressions, I would surely make them sit up
and beg.
Since I don't, I keep some state around, and I pass continuations and
closures like there's no tomorrow.
\subsection{Basic conversion}
Here's the basic engine, which works by string scanning.
The initial boilerplate sets up the second argument (if any) as
[[&subject]].
We have the odd specials [["\0"]] and [["\1"]], which are
used to delimit quoted code in noweb.
Woe betide the hapless user who has real nulls or 1s in his {\LaTeX} file.
<<*>>=
procedure convert(S, optstring)
static specials
initial {
<<initialization>>
<<control-sequence assignments>>
<<assign to dynamic-add table>>
specials := '\\{}<>"%$&~\n\0\1'
if \optstring then return optstring ? convert(S)
else {
<<scan, convert, and return result>>
If I were a good dog, I would make a state diagram.
Since I'm not, I'll just say that we either
accumulate text using the function [[S.text]], which exists for that
purpose, or else we do something special upon encountering a special character.
The [[<<take actions appropriate to new text>>]]
chunk may do something special with the text in
case we're not in the default state (for example, we may be scanning
for the end of a comment).
Encountering a non-threatening character throws the converter into
horizontal mode.
<<scan, convert, and return result>>=
<<take actions appropriate to new text>>
if S.mode == "V" & any(~'\\{}<>%\n\t ') then S.mode := "H"
emit_text(S, tab(upto(specials) | 0))
while not pos(0) do
if S.mode == "Q" then { # quoting
emit_text(S, tab(upto('\1') | 0))
if ="\1" then {
emit_text(S, "\1")
S.mode := "H"
}
} else {
case move(1) of {
"\\" : {<<control sequence>>}
"{" : {<<take open-group actions>>}
"}" : {<<take close-group actions>>}
"%" : {<<comment>>}
"~" : emit_text(S, " ") # should be but netscape doesn't support it
"\n" : {<<newline>>}
"$" : {<<dollar sign>>}
"&" : {<<ampersand>>}
"\0" : {S.mode := "Q"; emit_text(S, "\0")}
# remaining cases simply escape HTML specials
"<" : emit_text(S, "<")
">" : emit_text(S, ">")
"\"" : emit_text(S, """)
}
emit_text(S, tab(upto(specials) | 0))
return 1(. S.the_text, S.the_text := "") # what's been converted
The definition of a converter's state is distributed.
We've already seen the use of [[mode]].
<<*>>=
record state(mode <<other fields of state>>)
# mode is H, V, or M
To create a new state, the default mode is vertical
<<*>>=
procedure converter(mode)
/mode := "V"
return state(mode <<initial values for other fields of state>>)
To avoid repeated memory allocation, we provide a routine to reset a
converter to its initial state.
<<*>>=
procedure reset(S)
<<code to reset [[S]]>>
return S
The basic action performed by the
[[S.text]] function is to accumulate converted text in [[S.the_text]].
[[S.text]] is usually [[accumulate_text]].
<<*>>=
procedure accumulate_text(S, text)
S.the_text ||:= text
return
<<other fields of state>>=
, text, the_text
<<initial values for other fields of state>>=
, accumulate_text, ""
<<code to reset [[S]]>>=
S.text := accumulate_text
S.the_text := ""
[[emit_text]] just uses the current value of [[S.text]], provided we aren't
currently ignoring tokens.
Its primary use is to appear in closures, when we don't know what
[[S.text]] will be when the closure is executed.
<<*>>=
procedure emit_text(S, text)
return if \S.ignoring then "" else S.text(S, text)
<<other fields of state>>=
, ignoring
<<initial values for other fields of state>>=
, &null
<<code to reset [[S]]>>=
S.ignoring := &null
\subsection{Action and continuation hooks}
We provide hooks so that actions can be taken at various points.
The major ones are:
\begin{description}
\item[\tt newtext]
When the next string is passed in for conversion.
\item[open brace]
After the next open brace or begin environment.
\item[close brace]
Before the next close brace or end environment.
\end{description}
\subsubsection{{\tt newtext}}
[[newtext]] is a list of closures to be executed (actions to take)
when the next input comes.
<<other fields of state>>=
, newtext
<<initial values for other fields of state>>=
<<code to reset [[S]]>>=
S.newtext := []
A closure is simply a procedure with arguments.
<<*>>=
record closure(proc, args)
[[before_next_newtext]] and [[after_next_newtext]]
add to the list of actions to be taken (at the left and right, respectively).
<<*>>=
procedure before_next_newtext(S, proc, args)
push(S.newtext, closure(proc, args))
procedure after_next_newtext(S, proc, args)
put(S.newtext, closure(proc, args))
When taking the actions, be careful to avoid infinite loop, e.g., on empty lines.
<<take actions appropriate to new text>>=
l := S.newtext
S.newtext := []
while c := get(l) do
c.proc!c.args
Some control sequences temporarily override all actions to be taken on
a new input, using [[delay_newtext]].
[[undelay_newtext]] restores actions.
<<*>>=
procedure delay_newtext(S)
S.delayed_newtext := S.newtext
S.newtext := []
return
procedure undelay_newtext(S)
S.newtext := \S.delayed_newtext |
{write(&errout, "This can't happen: null delayed_newtext"); &null[0]}
S.delayed_newtext := &null
<<other fields of state>>=
, delayed_newtext
<<initial values for other fields of state>>=
, &null
<<code to reset [[S]]>>=
S.delayed_newtext := &null
\subsubsection{Opening and closing groups}
There's only one list of actions to be taken at the next open,
but there's a whole stack of lists of actions to be taken at closes.
<<other fields of state>>=
, open, closes
<<initial values for other fields of state>>=
, [], []
<<code to reset [[S]]>>=
every S.open | S.closes := []
<<*>>=
procedure after_next_open(S, proc, args)
return put(S.open, closure(proc, args))
procedure before_next_close(S, proc, args)
return push(S.closes[1], closure(proc, args)) # lost at top level
procedure after_next_close(S, proc, args)
return put(S.closes[1], closure(proc, args)) # lost at top level
<<take open-group actions>>=
push(S.closes, []) # fresh set of closing tasks
while c := get(S.open) do
c.proc!c.args
<<take close-group actions>>=
while c := get(S.closes[1]) do
c.proc!c.args
pop(S.closes)
<<old>>=
procedure Cbegingroup(S, cs, cl)
<<take open-group actions>>
<<old>>=
procedure Cendgroup(S, cs, cl)
<<take close-group actions>>
<<old control-sequence assignments>>=
cstab["begingroup"] := Cbegingroup
cstab["endgroup"] := Cendgroup
cstab["bgroup"] := Cbegingroup
cstab["egroup"] := Cendgroup
\subsection{Handling control sequences and environments}
OK, to eat a control sequence, first scan it, then execute it using [[do_cs]].
[[S.csletters]] records the current set of ``letters'' for control
sequences (so we can interpret \verb+\makeatletter+).
<<control sequence>>=
cs := if pos(0) then ""
else if any(S.csletters) then tab(many(S.csletters))
else move(1)
if /S.ignoring | cs == ("else"|"fi") | cstab[cs] === (Ciffalse|Ciftrue) then
do_cs(S, cs)
&null # error("### Ignoring \\", cs)
<<other fields of state>>=
, csletters
<<initial values for other fields of state>>=
, &letters
<<code to reset [[S]]>>=
S.csletters := &letters
To execute a control sequence, look up its procedure in [[cstab]],
and pass in the name of the control sequence, plus the closure
argument from [[csclosure]].
\label{cs-tables}
<<*>>=
global cstab, csclosure
procedure do_cs(S, cs)
tab(many(' \t')) # skip white space following CS
if pos(0) | any('\n') then before_next_newtext(S, skipblanks, [S])
(cstab[cs])(S, cs, csclosure[cs])
return
<<initialization>>=
cstab := table(unknown_cs)
csclosure := table()
The default action for an unknown control sequence is [[unknown_cs]].
If the global [[show_unknowns]] is set we dump the control sequence into the
output in bold. We save the unknown sequences for later warning messages.
<<*>>=
global show_unknowns
procedure unknown_cs(S, cs, cl)
# if S.text === ignore_text then return # a bit of a hack -- should no longer be needed
if \show_unknowns then S.text(S, "<b>\\" || cs || "</b>")
if not member(unknown_set, cs) then {
write(\unknown_file, "Warning: unknown control sequence \\", cs)
insert(unknown_set, cs)
return
<<initialization>>=
unknown_set := set()
<<*>>=
global cstab, csclosure, unknown_set
The control sequences \verb+\begin+ and \verb+\end+ are treated
specially,
so we can have a similar machinery for environments.
<<*>>=
global begintab, endtab, begincl, endcl
procedure do_begin(S, cs, cl)
(="{", env := tab(upto('}')), ="}") | error("botched \\begin{...}")
<<take open-group actions>>
(begintab[env])(S, env, begincl[env])
return
procedure do_end(S, cs, cl)
(="{", env := tab(upto('}')), ="}") | error("botched \\end{...}")
# write(&errout, "calling ", image(endtab[env]), " for \\end{", env, "}")
(endtab[env])(S, env, endcl[env])
<<take close-group actions>>
return
<<control-sequence assignments>>=
cstab["begin"] := do_begin
cstab["end"] := do_end
<<initialization>>=
every begintab | endtab := table(unknown_env)
every begincl | endcl := table()
<<*>>=
procedure unknown_env(S, env, cl)
### if S.text === ignore_text then return # a bit of a hack # no longer needed
if \show_unknowns then S.text(S, "<b>{" || env || "}</b>")
if not member(unknown_envs, env) then {
write(\unknown_file, "Warning: unknown environment {", env, "}")
insert(unknown_envs, env)
return
<<initialization>>=
unknown_envs := set()
<<*>>=
global unknown_envs
\subsection{Issuing warnings about unknown control sequences and environments}
<<*>>=
procedure warn_unknown(s, type, mark, rmark)
if *s > 0 then {
pushout("Unknown " || type || ": ")
every pushout(((\mark | "")\1) || !sort(s) || ((\rmark | "")\1) || " ")
pushout("\n")
<<*>>=
procedure pushout(s)
static col
initial col := 0
if find("\n", s) then
s ? {
pushout(tab(upto('\n')))
while ="\n" do {col := 0; write(&errout)}
pushout(tab(0))
}
else {
col +:= *s
if col >= 79 then {writes(&errout, "\n "); col := *s + 2}
writes(&errout, s)
return
\subsection{Procedures related to parsing {\TeX}}
\subsubsection{Comment-skipping}
This logic gobbles text into [[S.comment]]
until a newline is encountered, at which point it calls
[[Ccomment]] to format the comment.
All other new-text actions go on hold until the comment is over.
<<comment>>=
parse_dynamic_add()
delay_newtext(S)
eat_comment(S)
<<*>>=
procedure eat_comment(S)
S.comment ||:= tab(upto('\n') | 0)
if pos(0) then
before_next_newtext(S, eat_comment, [S])
else {
undelay_newtext(S)
Ccomment(S)
S.comment := ""
return
end
<<other fields of state>>=
, comment
<<initial values for other fields of state>>=
<<code to reset [[S]]>>=
S.comment := ""
Verbatim text is a little bit like comment text.
For verbatim environment, we have a tag for the corresponding HTML,
plus a string that terminates the environment.
<<*>>=
record verbatim_cl(html, terminator)
procedure Cverbatim(S, cs, cl)
S.text(S, tag(\cl.html))
delay_newtext(S)
do_verbatim(S, cl)
return
If we find the terminator, we're finished.
Otherwise, we swallow the whole input and make sure our action on next
input is to continue scanning.
<<*>>=
procedure do_verbatim(S, cl)
if verbatimout(S, tab(find(cl.terminator))) then {
=cl.terminator
S.text(S, endtag(\cl.html))
undelay_newtext(S)
} else {
verbatimout(S, tab(0))
before_next_newtext(S, do_verbatim, [S, cl])
return
When writing verbatim text, we still have to convert HTML specials.
<<*>>=
procedure verbatimout(S, s)
s ? {
while S.text(S, tab(upto('&<>"'))) do
case move(1) of {
"\"" : S.text(S, """)
"&" : S.text(S, "&")
"<" : S.text(S, "<")
">" : S.text(S, ">")
}
S.text(S, tab(0))
return
The \verb+\verb+ control sequence's terminator is the first character
following \verb+\verb+
<<*>>=
procedure Cverb(S, cs, cl)
Cverbatim(S, cs, verbatim_cl("tt", move(1)))
return
\subsubsection{Arguments}
It's occasionally necessary to collect the argument of a control
sequence.
[[csarg]] does the job.
<<*>>=
procedure csarg(S)
return 2(="{", tab(bal('}', '{', '}')), ="}") |
(optwhite(),
if ="\\" then
"\\" || (tab(many(S.csletters)) | move(1))
else
move(1))
\subsubsection{Misc specials}
Ampersand is weak --- I just use some string depending on the environment.
Tables look sort of OK.
Notice that ampersands close and open groups.
<<ampersand>>=
<<take close-group actions>>
emit_text(S, S.ampersand)
<<take open-group actions>>
<<other fields of state>>=
, ampersand
<<initial values for other fields of state>>=
, " --- "
The dollar sign is for entering and exiting math mode:
<<dollar sign>>=
if /S.ignoring then
if ="$" then
if S.mode == "M" then { Cdisplaymath_end(S); S.mode := "V" }
else { Cdisplaymath(S); S.mode := "M" }
else
if S.mode == "M" then { Cmath_end(S); S.mode := "H" }
else { Cmath(S); S.mode := "M" }
Newlines emit themselves, plus start skipping blanks until they get to
some nonblank text.
We have to identify a blank line so we can insert a paragraph marker.
<<newline>>=
emit_text(S, "\n")
if /S.ignoring then Cnewline(S)
<<*>>=
procedure Cnewline(S)
tab(many(' \t'))
if match("\n") then implicit_paragraph(S)
if pos(0) then before_next_newtext(S, Cnewline, [S])
Other procedures might want to skip white space, which includes
newlines, but we don't want to miss a paragraph.
<<*>>=
procedure skipblanks(S)
tab(many(' \t'))
if ="\n" then Cnewline(S)
else if pos(0) then before_next_newtext(S, skipblanks, [S])
Paragraphs count only in horizontal or math mode (and they better not
happen in math mode!).
<<*>>=
procedure implicit_paragraph(S, cs, cl)
if S.mode ~== "V" then {
S.mode := "V"
Cparagraph(S)
cs_ignore(S, cs, \cl)
Here's a real hack. I use it to stop skipping blanks when the noweb
filter sees text quoted by [[[[...]]]].
That text is never converted, but we don't want to skip blanks that
follow it.
<<*>>=
procedure stop_skipping(S)
while S.newtext[1].proc === (Cnewline|skipblanks) do pop(S.newtext)
\subsubsection{Items}
For items, we actually want to do something with the optional arguments,
namely, convert them.
We wrap them in braces so that any font changes and so on will be
appropriately limited in their effects.
<<*>>=
record item_cl(before, after, ifnone)
procedure Citem(S, cs, cl)
if pos(0) then
after_next_newtext(S, Citem, [S, cs, cl])
else if ="[" then {
delay_newtext(S)
with_upto_bracket(S, "", convert_bracketed, cl)
} else {
skipblanks(S)
S.text(S, cl[1].ifnone)
<<*>>=
procedure convert_bracketed(S, contents, cl)
S.text(S, cl[1].before ||
convert(converter("H"), "{" || contents || "}") ||
cl[1].after)
optwhite()
<<*>>=
procedure listenv(env, html)
begintab[env] := Clist
begincl[env] := html
endtab[env] := Clist_end
endcl[env] := html
procedure Clist(S, cs, cl)
S.text(S, tag(cl))
push(csclosure["item"],
if cs == "description" then item_cl("<dt>", "<dd>", "<dt><dd>")
else item_cl("<li>", "--", "<li>"))
procedure Clist_end(S, cs, cl)
S.text(S, endtag(cl))
pop(csclosure["item"])
\subsubsection{Labels and references}
These could be done by [[argblock]], except I want to make it possible to have
different text depending on whether the references point forward or backward.
<<*>>=
global labels_seen
procedure Clabel(S, cs, cl)
initial /labels_seen := set()
insert(labels_seen, l := csarg(S)) | fail
S.text(S, "<a name=\"" || l || "\"><b>[*]</b></a>")
procedure Cref(S, cs, cl)
initial /labels_seen := set()
l := csarg(S) | fail
S.text(S, "<a href=\"#" || l || "\">[" ||
(if member(labels_seen, l) then "<-" else "->") || "]</a>")
\subsubsection{Citations}
The important thing about a citation key is that it makes a hot line
to the appropriate item in the bibliography.
[[Ccite]] and [[Cbibitem]] work together to make it happen.
Optional arg might contain blanks, so it might be split, but
I assume the citation key isn't split between inputs.
<<*>>=
procedure Ccite(S, cs, cl, bracketed_text)
if ="[" then {
delay_newtext(S)
with_upto_bracket(S, "", do_cite, cl)
} else
do_cite(S, &null, cl)
procedure do_cite(S, commentary, cl)
local key
if \commentary then
optwhite()
if pos(0) then before_next_newtext(S, do_cite, [S, commentary, cl])
else {
key := csarg(S)
\commentary := convert(converter("H"), "{" || \commentary || "}")
S.text(S, "<b>[cite <a href=\"#NWcite-" || key || "\">" || key || "</a>" ||
(("<i>, " || \commentary || "</i>") | "" ) ||
"]</b>")
<<*>>=
procedure Cbibitem(S, cs, cl)
local label, key
static counter
initial counter := 0
if ="[" then {
delay_newtext(S)
with_upto_bracket(S, "", finish_bibitem, [])
} else {
label := "<b>[" || (counter +:= 1) || "]</b>"
key := csarg(S) | fail
S.text(S, "<br><a name=\"NWcite-" || key || "\">" || label || "</a> ")
procedure finish_bibitem(S, contents, args)
local key, label
optwhite()
key := csarg(S) | fail
label := convert(converter("H"), "{" || contents || "}")
S.text(S, "<br><a name=\"NWcite-" || key || "\">" || label || "</a> ")
\subsubsection{Conditionals}
The idea here is that an \verb+\if+$\cdots$ control sequence will conditionally
ignore text, and that \verb+\fi+ restores the previous state.
To keep track of state, we have an ``if stack'' that records what
[[S.text]] should be upon encountering \verb+\else+ and \verb+\fi+.
<<other fields of state>>=
, ifstack
<<initial values for other fields of state>>=
<<code to reset [[S]]>>=
if *S.ifstack > 0 then S.ifstack := [] # keeps GC down
What's on the ifstack is
<<*>>=
record ifrec(on_else, on_fi)
@ It's possible that one day this code will need to be updated to delay
new-text actions (and to do God knows what if
new-text actions have already been delayed).
Every \verb+\if+$\cdots$ is equivalent either to \verb+\iffalse+
of \verb+\iftrue+, so we begin by defining those, as well as \verb+\else+
and \verb+\fi+
<<*>>=
procedure Ciffalse(S, cs, cl)
#error("### \\", cs, " -> false (S.ignoring === ", image(S.ignoring) ? {="procedure "; tab(0)}, ")")
push(S.ifstack, ifrec(S.ignoring, S.ignoring))
S.ignoring := 1
procedure Ciftrue(S, cs, cl)
#error("### \\", cs, " -> true (S.ignoring === ", image(S.ignoring) ? {="procedure "; tab(0)}, ")")
push(S.ifstack, ifrec(1, S.ignoring))
procedure Celse(S, cs, cl)
S.ignoring := S.ifstack[1].on_else
#error("### \\else -> S.ignoring === ", image(S.ignoring) ? {="procedure "; tab(0)})
procedure Cfi(S, cs, cl)
S.ignoring := S.ifstack[1].on_fi
#error("### \\fi -> S.ignoring === ", image(S.ignoring) ? {="procedure "; tab(0)})
pop(S.ifstack)
Now, all that's left is to handle \verb+\newif+.
This part is all boilerplate.
<<*>>=
procedure Cnewif(S, cs, cl)
local newif, newcs
tab(many(' \t\n'))
if pos(0) then
after_next_newtext(S, Cnewif, [S, cs, cl])
else {
newif := csarg(S)
newif ?
if ="\\if" & newcs := tab(many(S.csletters)) & pos(0) then {
<<make [[newcs]] a new \verb+\if+-like thing>>
} else
error("\\newif argument botch: " || newif)
And here we do the real work:
<<make [[newcs]] a new \verb+\if+-like thing>>=
cstab[newcs || "false"] := Csetif
cstab[newcs || "true"] := Csetif
cstab["if" || newcs] := Ciffalse
<<*>>=
procedure Csetif(S, cs, cl)
local base, tag
if cs ? (base := tab(find("true"|"false")), tag := =("true"|"false"), pos(0)) then {
cstab["if" || base] := if tag == "true" then Ciftrue else Ciffalse
} else {
error("This can't happen --- setif botch (not urgent)")
\subsection{Reading and converting auxiliary {\LaTeX} files}
<<*>>=
procedure auxfile(cs, ext, placeholder, header, ignore)
cstab[cs] := Cauxfile
csclosure[cs] := aux_cl(ext, placeholder, header, \ignore | "")
[[Cauxfile]] succeeds if it finds a file, fails otherwise.
<<*>>=
record aux_cl(ext, placeholder, header, ignore)
procedure Cauxfile(S, cs, cl)
local auxfile, T
if auxfile := open(basename(\curfile) || "." || cl.ext) then {
T := converter("V")
Cmakeatletter(T)
S.text(S, \cl.header)
while line := read(auxfile) do
S.text(S, convert(T, line || "\n"))
close(auxfile)
} else {
S.text(S, \cl.placeholder)
cs_ignore(S, cs, cl.ignore)
if \auxfile then return
<<*>>=
procedure basename(name)
reverse(name) ? {
tab(upto('.')) & ="."
return reverse(tab(0))
\subsubsection{Table of contents}
We can build a table of contents by reading the .toc file.
Sadly, I haven't figured out how to get hot links yet.
<<control-sequence assignments>>=
cstab["contentsline"] := Ccontentsline
<<*>>=
procedure Ctableofcontents(S, cs, cl)
S.mode := "V"
Cauxfile(S, cs, cl)
set_toclevel(S)
[[set_toclevel]] manages the starting and ending of lists.
With no level argument, it resets the toc to the initial level.
<<*>>=
procedure set_toclevel(S, l)
static toclevel, initiallevel
if /initiallevel := \l then
S.text(S, "<ul compact>")
if /l := \initiallevel then
S.text(S, "</ul>")
if /l then return # never set a level
/toclevel := l
while toclevel < l do {
S.text(S, "<ul compact>")
toclevel +:= 1
while toclevel > l do {
S.text(S, "</ul>")
toclevel -:= 1
return
Assume one table of contents per converted document.
<<*>>=
procedure Ccontentsline(S, cs, cl)
local type, level
static leveltab
initial { <<assign numbers of sections in leveltab>> }
l := \leveltab[csarg()] | fail
if l > \countertab["tocdepth"] then
cs_ignore(S, cs, "{{") # skip this one
else {
set_toclevel(S, l)
S.text(S, "<li>")
after_next_open(S, after_next_close, [S, cs_ignore, [S, cs, "{"]])
<<assign numbers of sections in leveltab>>=
l := ["part", "chapter", "section", "subsection", "subsubsection",
"paragraph", "subparagraph"]
leveltab := table()
every i := 1 to *l do
leveltab[l[i]] := i - 2 # making section level 1
\subsubsection{Counters}
<<*>>=
global countertab
procedure Csetcounter(S, cs, cl)
local counter
(counter := csarg(), countertab[counter] := integer(csarg())) |
cs_ignore(S, cs, "{{")
<<initialization>>=
countertab := table()
\subsubsection{Accents}
This info is taken from the HTML RFC, section entitled
``ISO Latin~1 character entities.''
<<*>>=
global accent_name, accent_valid
<<initialization>>=
accent_name := table()
accent_valid := table('')
accent_name ["`"] := "grave"
accent_valid["`"] := 'AEIOUaeiou'
accent_name ["'"] := "acute"
accent_valid["'"] := 'AEIOUYaeiouy'
accent_name ["^"] := "circ"
accent_valid["^"] := 'AEIOUaeiou'
accent_name ["\""] := "uml"
accent_valid["\""] := 'AEIOUaeiouy'
accent_name ["~"] := "tilde"
accent_valid["~"] := 'ANOano'
accent_name ["="] := "bar"
accent_name ["."] := "dot"
accent_name ["u"] := "u"
accent_name ["v"] := "v"
accent_name ["H"] := "H"
accent_name ["t"] := "t"
accent_name ["c"] := "cedil"
accent_valid["c"] := 'Cc'
accent_name ["d"] := "underdot"
accent_name ["b"] := "underbar"
Initialization calls [[accent]] to indicate that a control
sequence represents an accent.
In fact, [[accent]] is called on all keys of [[accent_name]].
<<*>>=
procedure accent(cs)
cstab[cs] := Caccent
procedure Caccent(S, cs, cl)
static warned
initial warned := table()
arg := csarg(S) | return
if *arg = 1 & any(accent_valid[cs], arg) then
S.text(S, "&" || arg || accent_name[cs] || ";")
else {
<<warn about [[cs]] with [[arg]]>>
S.text(S, arg)
<<warn about [[cs]] with [[arg]]>>=
/warned[cs] := set()
if not member(warned[cs], arg) then {
write(&errout, "Warning: Can't handle \\", cs, " with arg `", arg, "'")
insert(warned[cs], arg)
\subsection{Font changes}
A font change changes the font until the next close, when we need to emit
the appropriate end tag.
<<*>>=
procedure fontchange(tex, html)
cstab[tex] := Cfontchange
csclosure[tex] := html
<<*>>=
procedure Cfontchange(S, tex, html)
S.text(S, tag(html))
before_next_close(S, emit_text, [S, endtag(html)])
\section{Implementations of declaratives}
\label{imp-decl}
\subsection{Ignoring stuff}
There are several different kinds of things that can be ignored:
ordinary arguments,
balanced-brace arguments, optional arguments, assignments (which may
include dimensions), stars, and parenthesized coordinates.
We ignore a sequence of these things by supplying a template to
[[ignore]], in which each character stands for something to be ignored.
We've already seen examples of these things in Section~\ref{cs-decls}.
We can ignore arguments of control sequences or environments.
In either case, [[cs_ignore]] does the work.
<<*>>=
procedure ignore(cs, template)
/template := ""
cstab[cs] := cs_ignore
csclosure[cs] := template
procedure ignoreenv(env, template)
/template := ""
begintab[env] := cs_ignore
begincl[env] := template
endtab[env] := do_nothing
Because ignoring may span many inputs, all [[cs_ignore]] does is set things
up to call [[do_ignore]].
The major setup is replacing [[S.text]] with a function that does nothing.
Oh, and it converts an integer template
into that many arguments, for historical reasons.
<<*>>=
procedure cs_ignore(S, cs, template, proc, args)
local saved_ignore
saved_ignore := S.ignoring
S.ignoring := 1
if type(template) == "integer" then template := repl("{", template)
return do_ignore(S, template, saved_ignore, proc, args)
Some things are easily ignored (partly because we assume they don't
span inputs). For others, we have special procedures.
The brace-ignoring stuff uses the open and close hooks, because braces
can be nested deeply.
If non-null, [[proc]] is applied to [[args]] after everything is ignored.
<<*>>=
procedure do_ignore(S, template, saved_ignore, proc, args)
if *template > 0 then
if optwhite() & pos(0) then
after_next_newtext(S, do_ignore, [S, template, saved_ignore, proc, args])
else
case template[1] of {
"{" : { S.ignoring := 1
after_next_open(S, ignore_til_close,
[S, template[2:0], saved_ignore, proc, args])
}
"A" : { csarg(S) # had better be in one input
do_ignore(S, template[2:0], saved_ignore, proc, args)
}
"[" : if optwhite() & ="[" then {
delay_newtext(S)
with_upto_bracket(S, "", ignore_bracket_plus,
[S, template[2:0], saved_ignore, proc, args])
} else
do_ignore(S, template[2:0], saved_ignore, proc, args)
"C" : # a total cheat, means ``copy optional arg''
if optwhite() & ="[" then {
S.ignoring := &null
delay_newtext(S)
with_upto_bracket(S, "", copy_bracket_plus,
[S, template[2:0], saved_ignore, proc, args])
} else
do_ignore(S, template[2:0], saved_ignore, proc, args)
"=" : { delay_newtext(S)
eat_assignment(S, do_ignore, [S, template[2:0], saved_ignore, proc,args])
}
"*" : { (="*", optwhite())
do_ignore(S, template[2:0], saved_ignore, proc, args)
}
"(" : { (="(", tab(upto(')')), =")", optwhite())
do_ignore(S, template[2:0], saved_ignore, proc, args)
}
}
else {
S.ignoring := saved_ignore
(\proc)!(\args)
procedure ignore_til_close(S, template, saved_ignore, proc, args)
before_next_close(S, do_ignore, [S, template, saved_ignore, proc, args])
Finally, at the end of an ignored environment, do nothing.
<<*>>=
procedure do_nothing(S, cs, cl)
return
\subsubsection{Parsing bracketed (optional) arguments}
We may have to deal with optional arguments that are split across lines.
We pass in a continuation for the bracket.
This is a lot like gobbling to a newline, which we had to do with a comment.
As in the other case, we do something stupid if the bracket is
protected (e.g. by a backslash or comment char).
<<*>>=
procedure with_upto_bracket(S, bracketed_text, proc, args)
bracketed_text ||:= tab(upto(']') | 0)
if pos(0) then
before_next_newtext(S, with_upto_bracket, [S, bracketed_text, proc, args])
else {
="]"
undelay_newtext(S)
(\proc)(S, bracketed_text, args)
return
end
To ignore brackets:
<<*>>=
procedure ignore_bracket_plus(S, contents, args)
# contents are ignored
do_ignore!args
@ and to copy them
<<*>>=
procedure copy_bracket_plus(S, contents, args)
local text
text := args[3] | fail # saved_ignore arg to do_ignore
text(S, convert(converter("H"), "{" || contents || "}"))
do_ignore!args
\subsubsection{Ignoring assignments}
Assignments are tricky because they might involve numbers, control
sequences, dimensions, or even glue.
We approximate the syntax from page 275 in the \TeX book.
<<*>>=
procedure eat_assignment(S, proc, args)
static decimal_chars
initial decimal_chars := &digits ++ '.,+-'
optwhite()
="=" # so what if we swallow multiple = signs
optwhite()
if pos(0) then {
before_next_newtext(S, eat_assignment, [S, proc, args])
return
} else if glue() then { # finished
} else if any(decimal_chars) then {
tab(many(decimal_chars))
optwhite()
if ="\\" then
tab(many(S.csletters)) | move(1)
# else assume assignment of the form \hangafter=2
} else if ="\\" then
tab(many(S.csletters)) | move(1)
undelay_newtext(S)
(\proc)!args
<<*>>=
procedure dimen()
static decimal_chars
initial decimal_chars := &digits ++ '.,'
suspend (optwhite(),
if any('+-') then (move(1), optwhite()) else "",
tab(many(decimal_chars)), optwhite(),
(="true", optwhite()) | &null,
=("em"|"ex"|"pt"|"pc"|"in"|"bp"|"cm"|"mm"|"dd"|"cc"|"sp"|"mu"))
<<*>>=
procedure glue()
suspend (dimen(),
(optwhite(), ="plus", dimen()) | "",
(optwhite(), ="minus", dimen()) | "")
\subsection{Substitution}
\subsubsection{Simple substitution for a single control sequence}
Even simple substitution isn't so simple, because in addition to the
HTML that we substitute for the {\TeX}, we can also supply a template
of stuff to be ignored (like the optional argument to \verb+\\+).
<<*>>=
procedure substitution(tex, html, ignore_template)
# ignore mode for now
cstab[tex] := Cemit_ig
csclosure[tex] := emit_ig_cl(html, \ignore_template | "")
The closure contains HTML to be written and a template to be ignored.
<<*>>=
record emit_ig_cl(html, template)
procedure Cemit_ig(S, cs, cl)
S.text(S, cl.html)
if *cl.template > 0 then
cs_ignore(S, cs, cl.template)
\subsubsection{Substitution for environments}
The [[envblock]] procedure has two forms:
\begin{itemize}
\item
{}[[envblock(]]{\it environment}, {\it tag}[[)]] simply uses
begin- and end-{\it tag} in place of the environment.
\item
{}[[envblock(]]{\it environment}, {\it left}, {\it right}, {\it
ignore}[[)]]
puts the {\it left} text at the beginning of the environment, the {\it
right} text at the end, plus at the beginning of the environment it
ignores the arguments described by {\it ignore}.
\end{itemize}
It's easier to implement than to describe.
<<*>>=
procedure envblock(env, left, right, ignore_template)
/ignore_template := ""
begintab[env] := Cemit_ig
begincl[env] := emit_ig_cl(if /right then tag(left) else left, ignore_template)
endtab[env] := Cemit
endcl[env] := if /right then endtag(left) else right
@ [[Cemit]] emits text with nothing to ignore.
<<*>>=
procedure Cemit(S, cs, cl)
S.text(S, cl)
\subsubsection{Substitution around arguments of control sequences}
These substitutions place tags at the beginning and end of arguments
to control sequences, instead of surrounding the contents of an
environment.
For example, they specify how to convert [[\section{...}]] to
[[<h1>...</h1>]] and so forth.
The calling convention is as for [[envblock]].
<<*>>=
record blockpair(left, right, ignore)
procedure argblock(tex, html, right, ignore)
# called as is envblock
/ignore := ""
cstab[tex] := Cblock
csclosure[tex] :=
if /right then blockpair (tag(html), endtag(html), ignore)
else blockpair (html, right, ignore)
@ There is a fine point; control sequences labelled with [[argblockv]]
should put the converter into vertical mode.
<<*>>=
procedure argblockv(tex, html, right, ignore)
argblock(tex, html, right, ignore)
cstab[tex] := CblockV
<<*>>=
procedure Cblock(S, cs, cl, done_ignoring)
if /done_ignoring & *cl.ignore > 0 then {
cs_ignore(S, cs, cl.ignore, Cblock, [S, cs, cl, 1])
} else if pos(0) then {
after_next_newtext(S, do_cs, [S, cs, cl])
} else if match("{") then {
S.text(S, cl.left)
after_next_open(S, before_next_close, [S, emit_text, [S, cl.right]])
} else {
S.text(S, cl.left || csarg(S) || cl.right)
return
<<*>>=
procedure CblockV(S, cs, cl)
S.mode := "V"
Cblock(S, cs, cl)
return
\subsection{Table environments}
For tables, we not only have an HTML tag, we also supply some text
for the ampersand.
[[args]] is a template describing the arguments to the environment,
which are ignored.
<<*>>=
record table_closure(args, amp, html)
procedure table_env(env, args, amp, html)
begintab[env] := Ctable
begincl[env] := table_closure(args, amp, html)
endtab[env] := Ctable_end
endcl[env] := []
<<*>>=
procedure Ctable(S, env, cl)
local amp
amp := S.ampersand
S.ampersand := cl.amp
S.text(S, tag(\cl.html))
push(endcl[env], amp)
cs_ignore(S, env, cl.args)
procedure Ctable_end(S, env, cl)
S.ampersand := pop(cl)
S.text(S, endtag(\begincl[env].html))
\subsection{Control-sequence assignment}
This procedure is available to be used for dynamic assignment.
One day we might use it to parse \verb+\let+ as well.
<<*>>=
procedure let(lhs, rhs)
cstab[lhs] := cstab[rhs]
csclosure[lhs] := csclosure[rhs]
\section{HTML formatting}
\label{html-format}
First, generic procedures used to create beginning and ending tags.
<<*>>=
procedure tag(html)
return "<" || html || ">"
procedure endtag(html)
return "</" || html || ">"
Next, a gazillion formatting procedures.
<<*>>=
procedure Ccomment(S)
if *S.comment > 0 then {
S.text(S, "<!--")
S.comment ? {
while S.text(S, tab(find("--"))) do {
move(2)
S.text(S, "- - ")
}
S.text(S, tab(0))
}
S.text(S, "-->")
S.comment := ""
return
<<*>>=
procedure Cparagraph(S)
S.text(S, "<p>")
<<*>>=
procedure Cmath(S)
<<take open-group actions>>
S.text(S, "<i>")
procedure Cmath_end(S)
S.text(S, "</i>")
<<take close-group actions>>
<<*>>=
procedure Cdisplaymath(S)
<<take open-group actions>>
S.text(S, "<blockquote><i>")
procedure Cdisplaymath_end(S)
S.text(S, "</i></blockquote>")
<<take close-group actions>>
<<*>>=
procedure Cmakeatletter(S)
S.csletters ++:= '@'
procedure Cmakeatother(S)
S.csletters --:= '@'
Approximate \verb+\kill+ by eliminating text.
<<*>>=
procedure Ckill(S, cs, cl)
S.the_text := ""
\section{Support for adding control sequences dynamically}
The idea is to use formal comments of the form:
\begin{quote}
\verb+% l2h function arg arg ...+
\end{quote}
These comments have the same effect as the procedure calls in
the chunk [[<<control-sequence assignments>>]].
Our first step is to create a table with the names of the functions we
recognize.
Ordinarly this table would be distributed, but I created it after the
fact with a little quick Unix pipeline.
<<*>>=
global csfunctions
<<initialization>>=
csfunctions := table()
<<assign to dynamic-add table>>=
csfunctions["argblock"] := argblock
csfunctions["argblockv"] := argblockv
csfunctions["envblock"] := envblock
csfunctions["fontchange"] := fontchange
csfunctions["ignore"] := ignore
csfunctions["ignoreenv"] := ignoreenv
csfunctions["let"] := let
csfunctions["listenv"] := listenv
csfunctions["substitution"] := substitution
Now, the tough issue is how to parse arguments. I'm going to try the
following initial strategy: arguments are separated by spaces.
To put a space within an argument, use \verb+#+. There is no way to
put a \verb+#+ within an argument.
<<*>>=
procedure parse_dynamic_add(S)
if (optwhite(), =("l2h"|"sl2h"), skipwhite(),
p := tab(upto(' \t')), <<make [[p]] a good function or warn and [[fail]]>>,
skipwhite(), any(~'\n')) then {
a := []
while any(~'\n') do {
put(a, map(tab(upto(' \t\n') | 0), "#", " "))
skipwhite()
}
p!a
return
<<make [[p]] a good function or warn and [[fail]]>>=
((p := \csfunctions[p]) |
{ dynamic_warn(p); fail })
<<*>>=
procedure dynamic_warn(p)
static badprocs
initial badprocs := set()
if not member(badprocs, p) then {
write(&errout, "Warning: % l2h ", p, " not recognized -- ignored")
insert(badprocs, p)
@
\section{Miscellanous utilities}
[[optwhite]] skips and returns optional white space.
<<*>>=
procedure optwhite()
suspend tab(many(' \t')) | ""
@ [[skipwhite]] insists that there must be some white space.
<<*>>=
procedure skipwhite()
suspend tab(many(' \t'))
\section{Main program for a noweb filter}
First, this is how we use the converter as a noweb filter.
<<l2h.icn>>=
<<*>>
procedure main(args)
local line
every arg := !args do
case arg of {
"-show-unknowns" : show_unknowns := 1
default : write(&errout, "l2h filter: unknown arg ", image(arg))
}
while line := read() do
apply(filter, line)
warn_unknown(\unknown_set, "control sequences", "\\")
warn_unknown(\unknown_envs, "environments", "{", "}")
procedure apply(pass, line)
line ? (="@" & pass(tab(upto(' ')|0), if =" " then tab(0) else &null))
This is noweb filter machinery. I really ought to coordinate quoted text
with the converter (so it always shows up in the right place),
but so far I'm too lazy.
<<l2h.icn>>=
global curfile, curline
procedure filter(name, arg)
static S, C, code
initial { S := converter("V"); C := converter("H") }
case name of {
"begin" : {<<out>>; if match("code ", arg) then code := 1}
"end" : {<<out>>; code := &null; S.mode := "V"}
"quote" : { outtext("\0" ? convert(S)) }
"endquote" : { outtext("\1" ? convert(S)) }
"file" : {<<out>>; curfile := arg; curline := 1}
"line" : {<<out>>; curline := integer(arg)}
"defn" : { write("@", name, " ", "{" || arg || "}" ? convert(C)); reset(C) }
"use" : { write("@", name, " ", "{" || arg || "}" ? convert(C)); reset(C) }
"text" : {if \code then <<out>> else outtext(arg ? convert(S))}
"nl" : {if \code then <<out>> else outtext("\n" ? convert(S)); curline +:= 1}
default : {<<out>>}
return
<<out>>=
write("@", name, (" " || \arg) | "")
<<l2h.icn>>=
procedure outtext(s)
s ?
while not pos(0) do
if ="\n" then write("@nl")
else if ="\0" then write("@quote")
else if ="\1" then write("@endquote")
else write("@text ", tab(upto('\n\0\1') | 0))
return
<<l2h.icn>>=
procedure error(args[])
return write!([&errout, (\curfile || ", ") | "", "line ", curline, ": "] ||| args)
\section{Main program for a simple converter}
<<sl2h.icn>>=
<<*>>
global curfile
procedure main(args)
S := converter("V")
every arg := !args do
if arg[1] == "-" then
case arg of {
"-show-unknowns" : show_unknowns := 1
default : write(&errout, "Warning: unrecognized option ", arg)
}
else if f := open(curfile <- arg) then
while line := read(f) do writes(convert(S, line || "\n"))
else
write(&errout, "Error: Can't open file ", arg)
if /curfile then
while line := read() do writes(convert(S, line || "\n"))
warn_unknown(\unknown_set, "control sequences", "\\")
warn_unknown(\unknown_envs, "environments", "{", "}")
\section{Chunks}
\nowebchunks
\begin{multicols}{2}[\section{Index}]
\nowebindex
\end{multicols}
\end{document}